

<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>pytorch_tabnet.pretraining_utils &mdash; pytorch_tabnet  documentation</title>
  

  
  <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/graphviz.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/./default.css" type="text/css" />

  
  
  
  

  
  <!--[if lt IE 9]>
    <script src="../../_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
        <script src="../../_static/jquery.js"></script>
        <script src="../../_static/underscore.js"></script>
        <script src="../../_static/doctools.js"></script>
        <script src="../../_static/language_data.js"></script>
    
    <script type="text/javascript" src="../../_static/js/theme.js"></script>

    
    <link rel="index" title="Index" href="../../genindex.html" />
    <link rel="search" title="Search" href="../../search.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="../../index.html" class="icon icon-home" alt="Documentation Home"> pytorch_tabnet
          

          
          </a>

          
            
            
          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        
        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <p><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html">README</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#tabnet-attentive-interpretable-tabular-learning">TabNet : Attentive Interpretable Tabular Learning</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#installation">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#what-is-new">What is new ?</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#contributing">Contributing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#what-problems-does-pytorch-tabnet-handle">What problems does pytorch-tabnet handle?</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#how-to-use-it">How to use it?</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#semi-supervised-pre-training">Semi-supervised pre-training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#data-augmentation-on-the-fly">Data augmentation on the fly</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#easy-saving-and-loading">Easy saving and loading</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/README.html#useful-links">Useful links</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../generated_docs/pytorch_tabnet.html">pytorch_tabnet package</a></li>
</ul>

            
          
        </div>
        
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../../index.html">pytorch_tabnet</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="../../index.html" class="icon icon-home"></a> &raquo;</li>
        
          <li><a href="../index.html">Module code</a> &raquo;</li>
        
      <li>pytorch_tabnet.pretraining_utils</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <h1>Source code for pytorch_tabnet.pretraining_utils</h1><div class="highlight"><pre>
<span></span><span class="kn">from</span> <span class="nn">torch.utils.data</span> <span class="kn">import</span> <span class="n">DataLoader</span>
<span class="kn">from</span> <span class="nn">pytorch_tabnet.utils</span> <span class="kn">import</span> <span class="p">(</span>
    <span class="n">create_sampler</span><span class="p">,</span>
    <span class="n">SparsePredictDataset</span><span class="p">,</span>
    <span class="n">PredictDataset</span><span class="p">,</span>
    <span class="n">check_input</span>
<span class="p">)</span>
<span class="kn">import</span> <span class="nn">scipy</span>


<div class="viewcode-block" id="create_dataloaders"><a class="viewcode-back" href="../../generated_docs/pytorch_tabnet.html#pytorch_tabnet.pretraining_utils.create_dataloaders">[docs]</a><span class="k">def</span> <span class="nf">create_dataloaders</span><span class="p">(</span>
    <span class="n">X_train</span><span class="p">,</span> <span class="n">eval_set</span><span class="p">,</span> <span class="n">weights</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">num_workers</span><span class="p">,</span> <span class="n">drop_last</span><span class="p">,</span> <span class="n">pin_memory</span>
<span class="p">):</span>
<span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">    Create dataloaders with or without subsampling depending on weights and balanced.</span>

<span class="sd">    Parameters</span>
<span class="sd">    ----------</span>
<span class="sd">    X_train : np.ndarray or scipy.sparse.csr_matrix</span>
<span class="sd">        Training data</span>
<span class="sd">    eval_set : list of np.array (for Xs and ys) or scipy.sparse.csr_matrix (for Xs)</span>
<span class="sd">        List of eval sets</span>
<span class="sd">    weights : either 0, 1, dict or iterable</span>
<span class="sd">        if 0 (default) : no weights will be applied</span>
<span class="sd">        if 1 : classification only, will balanced class with inverse frequency</span>
<span class="sd">        if dict : keys are corresponding class values are sample weights</span>
<span class="sd">        if iterable : list or np array must be of length equal to nb elements</span>
<span class="sd">                      in the training set</span>
<span class="sd">    batch_size : int</span>
<span class="sd">        how many samples per batch to load</span>
<span class="sd">    num_workers : int</span>
<span class="sd">        how many subprocesses to use for data loading. 0 means that the data</span>
<span class="sd">        will be loaded in the main process</span>
<span class="sd">    drop_last : bool</span>
<span class="sd">        set to True to drop the last incomplete batch, if the dataset size is not</span>
<span class="sd">        divisible by the batch size. If False and the size of dataset is not</span>
<span class="sd">        divisible by the batch size, then the last batch will be smaller</span>
<span class="sd">    pin_memory : bool</span>
<span class="sd">        Whether to pin GPU memory during training</span>

<span class="sd">    Returns</span>
<span class="sd">    -------</span>
<span class="sd">    train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader</span>
<span class="sd">        Training and validation dataloaders</span>
<span class="sd">    &quot;&quot;&quot;</span>
    <span class="n">need_shuffle</span><span class="p">,</span> <span class="n">sampler</span> <span class="o">=</span> <span class="n">create_sampler</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">X_train</span><span class="p">)</span>

    <span class="k">if</span> <span class="n">scipy</span><span class="o">.</span><span class="n">sparse</span><span class="o">.</span><span class="n">issparse</span><span class="p">(</span><span class="n">X_train</span><span class="p">):</span>
        <span class="n">train_dataloader</span> <span class="o">=</span> <span class="n">DataLoader</span><span class="p">(</span>
            <span class="n">SparsePredictDataset</span><span class="p">(</span><span class="n">X_train</span><span class="p">),</span>
            <span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span>
            <span class="n">sampler</span><span class="o">=</span><span class="n">sampler</span><span class="p">,</span>
            <span class="n">shuffle</span><span class="o">=</span><span class="n">need_shuffle</span><span class="p">,</span>
            <span class="n">num_workers</span><span class="o">=</span><span class="n">num_workers</span><span class="p">,</span>
            <span class="n">drop_last</span><span class="o">=</span><span class="n">drop_last</span><span class="p">,</span>
            <span class="n">pin_memory</span><span class="o">=</span><span class="n">pin_memory</span><span class="p">,</span>
        <span class="p">)</span>
    <span class="k">else</span><span class="p">:</span>
        <span class="n">train_dataloader</span> <span class="o">=</span> <span class="n">DataLoader</span><span class="p">(</span>
            <span class="n">PredictDataset</span><span class="p">(</span><span class="n">X_train</span><span class="p">),</span>
            <span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span>
            <span class="n">sampler</span><span class="o">=</span><span class="n">sampler</span><span class="p">,</span>
            <span class="n">shuffle</span><span class="o">=</span><span class="n">need_shuffle</span><span class="p">,</span>
            <span class="n">num_workers</span><span class="o">=</span><span class="n">num_workers</span><span class="p">,</span>
            <span class="n">drop_last</span><span class="o">=</span><span class="n">drop_last</span><span class="p">,</span>
            <span class="n">pin_memory</span><span class="o">=</span><span class="n">pin_memory</span><span class="p">,</span>
        <span class="p">)</span>

    <span class="n">valid_dataloaders</span> <span class="o">=</span> <span class="p">[]</span>
    <span class="k">for</span> <span class="n">X</span> <span class="ow">in</span> <span class="n">eval_set</span><span class="p">:</span>
        <span class="k">if</span> <span class="n">scipy</span><span class="o">.</span><span class="n">sparse</span><span class="o">.</span><span class="n">issparse</span><span class="p">(</span><span class="n">X</span><span class="p">):</span>
            <span class="n">valid_dataloaders</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
                <span class="n">DataLoader</span><span class="p">(</span>
                    <span class="n">SparsePredictDataset</span><span class="p">(</span><span class="n">X</span><span class="p">),</span>
                    <span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span>
                    <span class="n">sampler</span><span class="o">=</span><span class="n">sampler</span><span class="p">,</span>
                    <span class="n">shuffle</span><span class="o">=</span><span class="n">need_shuffle</span><span class="p">,</span>
                    <span class="n">num_workers</span><span class="o">=</span><span class="n">num_workers</span><span class="p">,</span>
                    <span class="n">drop_last</span><span class="o">=</span><span class="n">drop_last</span><span class="p">,</span>
                    <span class="n">pin_memory</span><span class="o">=</span><span class="n">pin_memory</span><span class="p">,</span>
                <span class="p">)</span>
            <span class="p">)</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="n">valid_dataloaders</span><span class="o">.</span><span class="n">append</span><span class="p">(</span>
                <span class="n">DataLoader</span><span class="p">(</span>
                    <span class="n">PredictDataset</span><span class="p">(</span><span class="n">X</span><span class="p">),</span>
                    <span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span>
                    <span class="n">sampler</span><span class="o">=</span><span class="n">sampler</span><span class="p">,</span>
                    <span class="n">shuffle</span><span class="o">=</span><span class="n">need_shuffle</span><span class="p">,</span>
                    <span class="n">num_workers</span><span class="o">=</span><span class="n">num_workers</span><span class="p">,</span>
                    <span class="n">drop_last</span><span class="o">=</span><span class="n">drop_last</span><span class="p">,</span>
                    <span class="n">pin_memory</span><span class="o">=</span><span class="n">pin_memory</span><span class="p">,</span>
                <span class="p">)</span>
            <span class="p">)</span>

    <span class="k">return</span> <span class="n">train_dataloader</span><span class="p">,</span> <span class="n">valid_dataloaders</span></div>


<div class="viewcode-block" id="validate_eval_set"><a class="viewcode-back" href="../../generated_docs/pytorch_tabnet.html#pytorch_tabnet.pretraining_utils.validate_eval_set">[docs]</a><span class="k">def</span> <span class="nf">validate_eval_set</span><span class="p">(</span><span class="n">eval_set</span><span class="p">,</span> <span class="n">eval_name</span><span class="p">,</span> <span class="n">X_train</span><span class="p">):</span>
<span class="w">    </span><span class="sd">&quot;&quot;&quot;Check if the shapes of eval_set are compatible with X_train.</span>

<span class="sd">    Parameters</span>
<span class="sd">    ----------</span>
<span class="sd">    eval_set : List of numpy array</span>
<span class="sd">        The list evaluation set.</span>
<span class="sd">        The last one is used for early stopping</span>
<span class="sd">    X_train : np.ndarray</span>
<span class="sd">        Train owned products</span>

<span class="sd">    Returns</span>
<span class="sd">    -------</span>
<span class="sd">    eval_names : list of str</span>
<span class="sd">        Validated list of eval_names.</span>

<span class="sd">    &quot;&quot;&quot;</span>
    <span class="n">eval_names</span> <span class="o">=</span> <span class="n">eval_name</span> <span class="ow">or</span> <span class="p">[</span><span class="sa">f</span><span class="s2">&quot;val_</span><span class="si">{</span><span class="n">i</span><span class="si">}</span><span class="s2">&quot;</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">eval_set</span><span class="p">))]</span>
    <span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">eval_set</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span>
        <span class="n">eval_names</span>
    <span class="p">),</span> <span class="s2">&quot;eval_set and eval_name have not the same length&quot;</span>

    <span class="k">for</span> <span class="n">set_nb</span><span class="p">,</span> <span class="n">X</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">eval_set</span><span class="p">):</span>
        <span class="n">check_input</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
        <span class="n">msg</span> <span class="o">=</span> <span class="p">(</span>
            <span class="sa">f</span><span class="s2">&quot;Number of columns is different between eval set </span><span class="si">{</span><span class="n">set_nb</span><span class="si">}</span><span class="s2">&quot;</span>
            <span class="o">+</span> <span class="sa">f</span><span class="s2">&quot;(</span><span class="si">{</span><span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s2">) and X_train (</span><span class="si">{</span><span class="n">X_train</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s2">)&quot;</span>
        <span class="p">)</span>
        <span class="k">assert</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="n">X_train</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">msg</span>
    <span class="k">return</span> <span class="n">eval_names</span></div>
</pre></div>

           </div>
           
          </div>
          <footer>
  

  <hr/>

  <div role="contentinfo">
    <p>
        
        &copy; Copyright 2019, Dreamquark

    </p>
  </div>
    
    
    
    Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
    
    <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
    
    provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
   

</body>
</html>